"""
Chi Square Feature Extraction Based Svms Arabic Language Text Categorization System
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.109.1652&amp;rep=rep1&amp;type=pdf
"""

def calculatechivalue(docdic,terms):
    """
    @docdic dictionary from docid to dic2
        dic2['pr'] has value 0 and 1,meaning different classes
        dic2['tc'] is dictionary from term to the term count
    @param terms all the terms
    @return valuedic dictionary from term to its chi value(the bigger,the better)
    """
    valuedic  = {}
    for term in terms:
        t = 0       #number of docs in which term occurs
        nt = 0      #number of docs in which term not occurs
        tc = 0      #number of docs which is in class 0 and has term
        ntnc = 0    #number of docs which is in class 1 and not has term
        tnc = 0     #number of docs which is in class 1 and has term
        ntc = 0     #number of docs which is in class 0 and not has term
        for doc in docdic.keys():
            if docdic[doc]['pr'] == 0:
                if docdic[doc]['tc'].has_key(term):
                    tc += 1
                    t += 1
                else:
                    ntc += 1
                    nt += 1
            else:
                if docdic[doc]['tc'].has_key(term):
                    tnc += 1
                    t += 1
                else:
                    ntnc += 1
                    nt += 1
        if nt == 0 or t == 0:
            print term+":zero"
            valuedic[term] = 0
        else:
            valuedic[term] = (tc*ntnc-tnc*ntc)*(tc*ntnc-tnc*ntc)/nt/t

    return valuedic
